### PLOTS OF PRE-EXPOSURE TRENDS IN VIEWING

library(tidyverse)
library(magrittr)
library(data.table)
library(lubridate)
library(hms)
library(ggthemes)
theme_set(theme_few())

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
data_dir <- paste0(path_to_archive, "data/")
setwd(data_dir)
plot_dir <- paste0(path_to_archive, "plots/")
tables_dir <- paste0(path_to_archive, "tables/")

### FUNCTION DEFINITIONS FOR WEIGHTING SCHEMES
diff2 <- function(x) x[seq(1,length(x)-1,2)] - x[seq(2,length(x),2)]
inv_var_weight <- function(i,d) {
	weighted.mean(d[i, est], w=sqrt(d[i, n_t] * d[i, n_c]))
}

total_size_weight <- function(i, d) {
	weighted.mean(d[i,est], w=d[i,n_t] + d[i,n_c])
}

simple_avg <- function(i,d){
	mean(d[i,est])
}

dd_wide <- readRDS(paste0(data_dir, "all_dd_data.rds"))
s_cols <- grep("s_p", colnames(dd_wide), value=T)
all_controls <- dd_wide[group!="T", map(.SD, sum), .SDcols = s_cols, by = .(dma_code, channel, affiliate, program, ad_time_utc, n_t, n_c1, n_c2, n_c12)] %>%
	.[,group:="C"]

dd_wide %<>% 
	rbind(all_controls) %>%
	.[,n_c := n_c1 + n_c2 + n_c12] %>%
	.[group=="C", (s_cols) := map(.SD, ~ . / n_c), .SDcols = s_cols] %>% 
	.[group=="C1", (s_cols) := map(.SD, ~ . / n_c1), .SDcols = s_cols] %>% 
	.[group=="C2", (s_cols) := map(.SD, ~ . / n_c2), .SDcols = s_cols] %>% 
	.[group=="C1+C2", (s_cols) := map(.SD, ~ . / n_c12), .SDcols = s_cols] %>% 
	.[group=="T", (s_cols) := map(.SD, ~ . / n_t), .SDcols = s_cols] 



dd_long <- dd_wide[group %in% c("C","T")] %>%
	gather(key="hour", val="view_mean", dplyr::starts_with("s_pre"), dplyr::starts_with("s_post")) %>%
	as.data.table

dd_long[, `:=`(pre_post = sub("s_(pre|post)_(\\d+)", "\\1", hour),
			   hour = sub("s_(pre|post)_(\\d+)", "\\2", hour) %>% as.numeric)]

dd_long[,  `:=` (h_of_day = lubridate::hour(ad_time_utc + ifelse(pre_post=="post", -hours(1), hours(0)) + ifelse(pre_post=="pre", -1, 1) * hours(hour)),
				 show_min = as.numeric(ad_time_utc - floor_date(ad_time_utc, "30 minutes"), units="mins"),
				 tz = as.factor(substr(dma_code,1,1)))]

dd_long[tz != 7 & group %in% c("T","C"), resid_s := residuals(lm(view_mean ~ tz*factor(h_of_day), weights = case_when(group == "T" ~ n_t, group == "C" ~ n_c, group == "C1" ~ n_c1, group == "C2" ~ n_c2, group == "C1+C2" ~ n_c12)))] 


pre_trend_avg <- 
	dd_long[,.(view_mean = mean(view_mean/60, na.rm=T),
			   view_sd = sd(view_mean/60, na.rm=T),
			   n = .N), by = .(group, pre_post, hour)]
pre_trend_avg[,hour:=hour * ifelse(pre_post=="pre", -1, 1)]


### FIGURE 1(A)
ggplot(aes(x=hour, y=view_mean, group=group), data = pre_trend_avg[group%in% c("C","T")]) +
	geom_point(aes(colour=group, shape=group), size=2) +
	scale_colour_grey() +
	labs(x="Hour Window Relative to Ad Time", y = "Average Minutes by Group")

ggsave(filename=paste0(plot_dir, "pretrends.pdf"), height=6, width=10)

pre_trend_avg <- 
	dd_long[,.(view_mean = mean(resid_s/60, na.rm=T),
			   view_sd = sd(resid_s/60, na.rm=T),
			   n = .N), by = .(group, pre_post, hour)]
pre_trend_avg[,hour:=hour * ifelse(pre_post=="pre", -1, 1)]

### FIGURE 1(B)
ggplot(aes(x=hour, y=view_mean, group=group), data = pre_trend_avg[group%in% c("C","T")]) +
	geom_point(aes(colour=group, shape=group), size=2) +
	scale_colour_grey() +
	labs(x="Hour Window Relative to Ad Time", y = "Average Residual Minutes by Group")
ggsave(filename=paste0(plot_dir, "pretrends_residualized.pdf"), height=6, width=10)

difference_test <- 
	pre_trend_avg %>% copy %>%
	.[,.(t_c_diff = view_mean[group=="T"] - view_mean[group=="C"],
		 t_c_se = view_sd[group=="T"] / sqrt(n[group=="T"]) + view_sd[group=="C"] / sqrt(n[group=="C"])), by=.(hour)] %>%
	.[,hour_abs:=ifelse(hour>0, hour, 25+hour)] %>%
	.[,.(dd = diff(t_c_diff), dd_se=sum(t_c_se)), by =.(hour_abs)]

### FIGURE 2(A)
diffplot <- ggplot(aes(x=hour_abs, y=dd), data = difference_test ) + 
	geom_point() +
	geom_linerange(aes(ymin=dd - 1.96*dd_se, ymax=dd + 1.96*dd_se)) +
	geom_hline(yintercept=0) +
	scale_x_continuous(limits=c(0,24), breaks = 1:24) +
	scale_y_continuous(limits=c(-0.5,2.5), breaks=seq(-0.5,2.5,0.25)) +
	labs(x="Hour Window Relative to Ad Time", y = "DiD Estimate of Effect on News Viewing")

ggsave(filename=paste0(plot_dir, "pretrends_diffs.pdf"), height=6, width=10)
